[Instcombine]: `llvm.ucmp` and `llvm.scmp` recognition #168505

kper · 2025-11-18T09:16:46Z

Created a pattern to recognize llvm.ucmp and llvm.scmp.
Alive Proof: https://alive2.llvm.org/ce/z/BYRyu-

Closes #166579

llvmbot · 2025-11-18T09:17:22Z

@llvm/pr-subscribers-llvm-transforms

Author: None (kper)

Changes

Created a pattern to recognize llvm.ucmp and llvm.scmp.
Alive Proof: https://alive2.llvm.org/ce/z/BYRyu-

Closes #166579

Full diff: https://github.com/llvm/llvm-project/pull/168505.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (+45)
(modified) llvm/test/Transforms/InstCombine/select-cmp.ll (+114)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 9572f9d702e1b..5c8008700e181 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1955,6 +1955,48 @@ static Instruction *foldSelectICmpEq(SelectInst &SI, ICmpInst *ICI,
   return nullptr;
 }
 
+/// Transform
+///
+/// select(icmp(eq, X, Y), Z, select(icmp(ult, X, Y), -1, 1))
+/// into select(icmp(eq, X, Y), Z, llvm.ucmp(freeze(X), freeze(Y)))
+///
+/// or
+///
+/// select(icmp(eq, X, Y), Z, select(icmp(slt, X, Y), -1, 1))
+/// into select(icmp(eq, X, Y), Z, llvm.scmp(freeze(X), freeze(Y)))
+static Value *foldSelectToInstrincCmp(SelectInst &SI, const ICmpInst *ICI,
+                                      Value *TrueVal, Value *FalseVal,
+                                      InstCombiner::BuilderTy &Builder) {
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+
+  if (Pred != ICmpInst::ICMP_EQ)
+    return nullptr;
+
+  CmpPredicate IPred;
+  if (match(FalseVal, m_Select(m_ICmp(IPred, m_Specific(ICI->getOperand(0)),
+                                      m_Specific(ICI->getOperand(1))),
+                               m_AllOnes(), m_One())) &&
+      (IPred == ICmpInst::ICMP_ULT || IPred == ICmpInst::ICMP_SLT)) {
+    Value *X = ICI->getOperand(0);
+    Value *Y = ICI->getOperand(1);
+    Builder.SetInsertPoint(&SI);
+    auto IID = IPred == ICmpInst::ICMP_ULT ? Intrinsic::ucmp : Intrinsic::scmp;
+
+    // Edge Case: if Z is the constant 0 then the select can be folded
+    // to just the instrinsic comparison.
+    if (match(TrueVal, m_Zero()))
+      return Builder.CreateIntrinsic(SI.getType(), IID, {X, Y});
+
+    Value *FrozenX = Builder.CreateFreeze(X, X->getName() + ".frz");
+    Value *FrozenY = Builder.CreateFreeze(Y, Y->getName() + ".frz");
+    Value *Cmp =
+        Builder.CreateIntrinsic(FrozenX->getType(), IID, {FrozenX, FrozenY});
+    return Builder.CreateSelect(SI.getCondition(), TrueVal, Cmp, "select.ucmp");
+  }
+
+  return nullptr;
+}
+
 /// Fold `X Pred C1 ? X BOp C2 : C1 BOp C2` to `min/max(X, C1) BOp C2`.
 /// This allows for better canonicalization.
 Value *InstCombinerImpl::foldSelectWithConstOpToBinOp(ICmpInst *Cmp,
@@ -2186,6 +2228,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
   if (Value *V = foldSelectWithConstOpToBinOp(ICI, TrueVal, FalseVal))
     return replaceInstUsesWith(SI, V);
 
+  if (Value *V = foldSelectToInstrincCmp(SI, ICI, TrueVal, FalseVal, Builder))
+    return replaceInstUsesWith(SI, V);
+
   return Changed ? &SI : nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/select-cmp.ll b/llvm/test/Transforms/InstCombine/select-cmp.ll
index b1bd7a0ecc8ac..bf1a6cb047c37 100644
--- a/llvm/test/Transforms/InstCombine/select-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/select-cmp.ll
@@ -808,5 +808,119 @@ define i1 @icmp_lt_slt(i1 %c, i32 %arg) {
   ret i1 %select
 }
 
+define i16 @icmp_fold_to_llvm_ucmp_when_eq(i16 %x, i16 %y) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_when_eq(
+; CHECK-NEXT:    [[Y_FRZ:%.*]] = freeze i16 [[Y:%.*]]
+; CHECK-NEXT:    [[X_FRZ:%.*]] = freeze i16 [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X_FRZ]], [[Y_FRZ]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.ucmp.i16.i16(i16 [[X_FRZ]], i16 [[Y_FRZ]])
+; CHECK-NEXT:    [[SELECT_UCMP:%.*]] = select i1 [[TMP1]], i16 42, i16 [[TMP2]]
+; CHECK-NEXT:    ret i16 [[SELECT_UCMP]]
+;
+  %3 = icmp eq i16 %x, %y
+  %4 = icmp ult i16 %x, %y
+  %5 = select i1 %4, i16 -1, i16 1
+  %6 = select i1 %3, i16 42, i16 %5
+  ret i16 %6
+}
+
+define i16 @icmp_fold_to_llvm_ucmp_when_ult_and_Z_zero(i16 %x, i16 %y) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_when_ult_and_Z_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.ucmp.i16.i16(i16 [[X:%.*]], i16 [[Y:%.*]])
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %3 = icmp eq i16 %x, %y
+  %4 = icmp ult i16 %x, %y
+  %5 = select i1 %4, i16 -1, i16 1
+  %6 = select i1 %3, i16 0, i16 %5
+  ret i16 %6
+}
+
+define i16 @icmp_fold_to_llvm_ucmp_when_slt_and_Z_zero(i16 %x, i16 %y) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_when_slt_and_Z_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.scmp.i16.i16(i16 [[X:%.*]], i16 [[Y:%.*]])
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %3 = icmp eq i16 %x, %y
+  %4 = icmp slt i16 %x, %y
+  %5 = select i1 %4, i16 -1, i16 1
+  %6 = select i1 %3, i16 0, i16 %5
+  ret i16 %6
+}
+
+define i16 @icmp_fold_to_llvm_ucmp_when_cmp_slt(i16 %x, i16 %y) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_when_cmp_slt(
+; CHECK-NEXT:    [[Y_FRZ:%.*]] = freeze i16 [[Y:%.*]]
+; CHECK-NEXT:    [[X_FRZ:%.*]] = freeze i16 [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X_FRZ]], [[Y_FRZ]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.scmp.i16.i16(i16 [[X_FRZ]], i16 [[Y_FRZ]])
+; CHECK-NEXT:    [[SELECT_UCMP:%.*]] = select i1 [[TMP1]], i16 42, i16 [[TMP2]]
+; CHECK-NEXT:    ret i16 [[SELECT_UCMP]]
+;
+  %3 = icmp eq i16 %x, %y
+  %4 = icmp slt i16 %x, %y ; here "ult" changed to "slt"
+  %5 = select i1 %4, i16 -1, i16 1
+  %6 = select i1 %3, i16 42, i16 %5
+  ret i16 %6
+}
+
+define i16 @icmp_fold_to_llvm_ucmp_when_value(i16 %x, i16 %y, i16 %Z) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_when_value(
+; CHECK-NEXT:    [[Y_FRZ:%.*]] = freeze i16 [[Y:%.*]]
+; CHECK-NEXT:    [[X_FRZ:%.*]] = freeze i16 [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X_FRZ]], [[Y_FRZ]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.ucmp.i16.i16(i16 [[X_FRZ]], i16 [[Y_FRZ]])
+; CHECK-NEXT:    [[SELECT_UCMP:%.*]] = select i1 [[TMP1]], i16 [[Z:%.*]], i16 [[TMP2]]
+; CHECK-NEXT:    ret i16 [[SELECT_UCMP]]
+;
+  %3 = icmp eq i16 %x, %y
+  %4 = icmp ult i16 %x, %y
+  %5 = select i1 %4, i16 -1, i16 1
+  %6 = select i1 %3, i16 %Z, i16 %5
+  ret i16 %6
+}
+
+define i16 @icmp_fold_to_llvm_ucmp_when_ne(i16 %x, i16 %y) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_when_ne(
+; CHECK-NEXT:    [[Y_FRZ:%.*]] = freeze i16 [[Y:%.*]]
+; CHECK-NEXT:    [[X_FRZ:%.*]] = freeze i16 [[X:%.*]]
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i16 [[X_FRZ]], [[Y_FRZ]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.ucmp.i16.i16(i16 [[X_FRZ]], i16 [[Y_FRZ]])
+; CHECK-NEXT:    [[SELECT_UCMP:%.*]] = select i1 [[DOTNOT]], i16 42, i16 [[TMP1]]
+; CHECK-NEXT:    ret i16 [[SELECT_UCMP]]
+;
+  %3 = icmp ne i16 %x, %y
+  %4 = icmp ult i16 %x, %y
+  %5 = select i1 %4, i16 -1, i16 1
+  %6 = select i1 %3, i16 %5, i16 42
+  ret i16 %6
+}
+
+define i16 @icmp_fold_to_llvm_ucmp_negative_test_invalid_constant_1(i16 %x, i16 %y, i16 %Z) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_negative_test_invalid_constant_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[Z:%.*]], i16 1
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %3 = icmp eq i16 %x, %y
+  %4 = icmp ult i16 %x, %y
+  %5 = select i1 %4, i16 1, i16 1 ; invalid constant
+  %6 = select i1 %3, i16 %Z, i16 %5
+  ret i16 %6
+}
+
+define i16 @icmp_fold_to_llvm_ucmp_negative_test_invalid_constant_2(i16 %x, i16 %y, i16 %Z) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_negative_test_invalid_constant_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[Z:%.*]], i16 -1
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %3 = icmp eq i16 %x, %y
+  %4 = icmp ult i16 %x, %y
+  %5 = select i1 %4, i16 -1, i16 -1 ; invalid constant
+  %6 = select i1 %3, i16 %Z, i16 %5
+  ret i16 %6
+}
+
 declare void @use(i1)
 declare void @use.i8(i8)

kper · 2025-11-18T09:22:41Z

@dtcxzyw could you run the benchmarks to see whether this transformation is profitable?

github-actions · 2025-11-18T10:03:05Z

🐧 Linux x64 Test Results

186436 tests passed
4864 tests skipped

kper · 2025-11-20T08:04:37Z

@dtcxzyw thanks, I fixed two bugs. First, the return type of the intrinsic wasn't correct. Second, the fold must not be applied to pointer comparisons.
Would you please rerun the benchmark? Thank you!

…X, Y)

kper · 2025-11-21T07:40:04Z

The last commit added another transformation to handle cmp4. While, it compiles exactly as mentioned in the issue, it is quite unfortunate that it didn't have any effects on the benchmarks.
That's why I would like to know your opinion whether this PR is even worth to be merged? @nikic @dtcxzyw

dtcxzyw

For the first pattern, s/ucmp is less profitable than select + icmp because the former one needs two icmp + two selects/one sub (See also TargetLowering::expandCMP). It doesn't simplify the IR since X and Y still have two uses after the transformation.

For the second pattern select(icmp(eq, X, Y), 0, llvm.cmp(X, Y)) -> llvm.cmp(X, Y), absorbing the equality test into s/ucmp is interesting. Unfortunately we haven't seen its existence in real-world programs.

kper added 3 commits November 18, 2025 09:00

[InstCombine]: Fold select into llvm.ucmp or llvm.scmp

899d753

[InstCombine]: Handle edge case when Z is zero

ae64545

[InstCombine]: Fixed condition

0347b82

kper requested a review from nikic as a code owner November 18, 2025 09:16

llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Nov 18, 2025

dtcxzyw mentioned this pull request Nov 18, 2025

Task submission dtcxzyw/llvm-opt-benchmark#1312

Open

zyw-bot mentioned this pull request Nov 18, 2025

pre-commit: PR168505 dtcxzyw/llvm-opt-benchmark#3075

Open

kper added 2 commits November 19, 2025 20:52

[InstCombine]: Fixed type

fedb388

[InstCombine]: Don't apply fold when ptr

239dcbd

zyw-bot mentioned this pull request Nov 20, 2025

pre-commit: PR168505 dtcxzyw/llvm-opt-benchmark#3079

Closed

[InstCombine]: select(icmp(eq, X, Y), 0, llvm.cmp(X, Y)) -> llvm.cmp(…

aaac1a1

…X, Y)

dtcxzyw reviewed Nov 21, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Instcombine]: `llvm.ucmp` and `llvm.scmp` recognition #168505

[Instcombine]: `llvm.ucmp` and `llvm.scmp` recognition #168505

kper commented Nov 18, 2025

Uh oh!

llvmbot commented Nov 18, 2025

Uh oh!

kper commented Nov 18, 2025

Uh oh!

github-actions bot commented Nov 18, 2025 •

edited

Loading

Uh oh!

kper commented Nov 20, 2025

Uh oh!

kper commented Nov 21, 2025

Uh oh!

dtcxzyw left a comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

[Instcombine]: llvm.ucmp and llvm.scmp recognition #168505

Are you sure you want to change the base?

[Instcombine]: llvm.ucmp and llvm.scmp recognition #168505

Conversation

kper commented Nov 18, 2025

Uh oh!

llvmbot commented Nov 18, 2025

Uh oh!

kper commented Nov 18, 2025

Uh oh!

github-actions bot commented Nov 18, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🐧 Linux x64 Test Results

Uh oh!

kper commented Nov 20, 2025

Uh oh!

kper commented Nov 21, 2025

Uh oh!

dtcxzyw left a comment

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

[Instcombine]: `llvm.ucmp` and `llvm.scmp` recognition #168505

[Instcombine]: `llvm.ucmp` and `llvm.scmp` recognition #168505

github-actions bot commented Nov 18, 2025 •

edited

Loading